 
#---------------------------------------------------------------------------------------
# regular R functions to be used in S4 functions

# reads a table in a fast way to a dataframe
.readTableFast<-function(filename,header=T,skip=0,sep="")
{
  tab5rows <- read.table(filename, header = header,skip=skip,sep=sep, nrows = 100)
  classes  <- sapply(tab5rows, class)
  return( read.table(filename, header = header,skip=skip,sep=sep, colClasses = classes)  )
}

# reformats a data.frame to a standard methylraw data.frame
# no matter what the alignment pipeline
.structureAMPoutput<-function(data)
{  
  strand=rep("+",nrow(data))
  strand[data[,4]=="R"]="-"
  numCs=round(data[,5]*data[,6]/100)
  numTs=round(data[,5]*data[,7]/100)
  
  data.frame(id=data[,1],chr=data[,2],start=data[,3],end=data[,3]
             ,strand=strand,coverage=data[,5],numCs=numCs,numTs=numTs)
}

# reformats a generic structure data.frame to a standard methylraw data.frame
# based on the column number assignment and if freqC is fraction or not.
.structureGeneric<-function(data, pipeline)
{
    fraction=pipeline$fraction
    chr.col=pipeline$chr.col
    start.col=pipeline$start.col
    end.col=pipeline$end.col
    coverage.col=pipeline$coverage.col
    strand.col=pipeline$strand.col
    freqC.col=pipeline$freqC.col
    
    strand=rep("+",nrow(data))
    strand[data[,strand.col]=="R" | data[,strand.col]=="-"]="-"
    adj=ifelse(fraction, 1, 100)
    numCs=round(data[,coverage.col]*data[,freqC.col]/adj)
    numTs=data[,coverage.col] - numCs
    id=paste(data[,chr.col], data[,start.col],sep=".")
    
    data.frame(id=id,chr=data[,chr.col],start=data[,start.col],end=data[,end.col]
    ,strand=strand,coverage=data[,coverage.col],numCs=numCs,numTs=numTs)
    
}

.check.pipeline.list<-function(pipeline){
    if(!all(c("fraction", "chr.col", "start.col", "end.col", "coverage.col", "strand.col", "freqC.col") %in% names(pipeline))){
        stop("Miss components for pipeline for the generic read. Try amp, or bismark, or give the correct format of pipeline list for generic read!")
    }
    
    values=c(pipeline$chr.col, pipeline$start.col, pipeline$coverage.col, pipeline$strand.col, pipeline$freqC.col)
    if(any(duplicated(values))){
        stop("Find duplicated column number among chr.col, start.col, coverage.col, strand.col, freqC.col!")
    }
}

# unfies forward and reverse strand CpGs on the forward strand if the if both are on the same CpG
# if that's the case their values are generally correlated
.CpG.dinuc.unify<-function(cpg)
{

  cpgR=cpg[cpg$strand=="-",]
  cpgF=cpg[cpg$strand=="+",]
  cpgR$start=cpgR$start-1
  cpgR$end=cpgR$end-1
  cpgR$strand="+"
  
  cpgR$id=paste(cpgR$chr,cpgR$start,sep=".")

  cpgFR=merge(cpgF,cpgR,by="id")
  #hemi =cpgFR[abs(cpgFR$freqC.x-cpgFR$freqC.y)>=50,]
  #cpgFR=cpgFR[abs(cpgFR$freqC.x-cpgFR$freqC.y)<50,]  
  res=data.frame(
                 id =as.character(cpgFR$id),
                 chr     =as.character(cpgFR$chr.x),
                 start    =(cpgFR$start.x),
                 end      =(cpgFR$start.x),
                 strand  =rep("+",nrow(cpgFR)),
                 coverage=cpgFR$coverage.x + cpgFR$coverage.y,
                 numCs   =cpgFR$numCs.x + cpgFR$numCs.y ,
                 numTs   =cpgFR$numTs.x + cpgFR$numTs.y ,stringsAsFactors =F
  )
  res=rbind(res, cpgF[ !cpgF$id  %in%  res$id,],cpgR[ !cpgR$id  %in%  res$id,] )
  res=res[order(res$id),]
  return(res)
}


# end of regular functions to be used in S4 functions
#---------------------------------------------------------------------------------------



valid.methylRawObj <- function(object) {
  
    
    data=getData(object)
    check1=( (object@resolution == "base") | (object@resolution == "region") )
    check2=(ncol(data)==8)
    if(check1 & check2){
      return(TRUE)
    }
    else if (! check1 ){
        paste("resolution slot has to be either 'base' or 'region': other values not allowed")
    }
    else if(! check2){
        paste("data part of methylRaw have",ncol(data),"columns, expected 8 columns")
    }

}


#' An S4 class for holding raw methylation data from an alignment pipeline.
#'
#' This object stores the raw mehylation data that is read in through read function and extends \code{data.frame}.
#' The raw methylation data is basically percent methylation values and read coverage values per genomic region.
#'
#' @section Slots:\describe{
#'                  \item{\code{sample.id}:}{string for an identifier of the sample}
#'                  \item{\code{assembly}:}{string for genome assembly, ex: hg18,hg19,mm9}
#'                  \item{\code{context}:}{ methylation context string, ex: CpG,CpH,CHH, etc.}
#'                  \item{\code{resolution}:}{ resolution of methylation information, 'base' or 'region'}
#'                 }
#' @section Details:
#' \code{methylRaw} class extends \code{\link{data.frame}} class therefore providing novice and experienced R users with a data structure that is well known and ubiquitous in many R packages.
#' 
#' 
#' @section Accessors:
#' The following functions provides access to data slots of methylDiff:
#' \code{\link[methylKit]{getData}},\code{\link[methylKit]{getAssembly}},\code{\link[methylKit]{getContext}}
#' 
#' @section Coercion:
#'   \code{methylRaw} object can be coerced to \code{\link[GenomicRanges]{GRanges}} object via \code{\link{as}} function.
#' 
#' @examples
#' 
#' # example of a raw methylation data contained as a text file
#' read.table(system.file("extdata", "control1.myCpG.txt", package = "methylKit"),header=TRUE,nrows=5)
#' 
#' data(methylKit)
#' 
#' # example of a methylRaw object
#' head(methylRawList.obj[[1]])
#' str(head(methylRawList.obj[[1]]))
#' 
#' library(GenomicRanges)
#' 
#' #coercing methylRaw object to GRanges object
#' my.gr=as(methylRawList.obj[[1]],"GRanges")
#' 
#' @name methylRaw-class
#' @aliases methylRaw
#' @docType class
#' @rdname methylRaw-class
#' @export
setClass("methylRaw", contains= "data.frame",representation(
  sample.id = "character", assembly = "character",context="character",resolution="character"),validity=valid.methylRawObj)


#' An S4 class for holding a list of methylRaw objects.
#'
#' This class stores the list of  \code{\link[methylKit]{methylRaw}} objects.
#' Functions such as \code{lapply} can be used on this list. It extends \code{\link[base]{list}} class. This object is primarily produced
#' by \code{\link[methylKit]{read}} function.
#'
#' @section Slots:\describe{
#'                  \item{\code{treatment}}{numeric vector denoting control and test samples}
#'                  \item{\code{.Data}}{a list of \code{\link{methylRaw}} objects  } 
#'                }
#'                
#' @examples
#' data(methylKit)
#' 
#' #applying functions designed for methylRaw on methylRawList object
#' lapply(methylRawList.obj,"getAssembly")
#'
#' @name methylRawList-class
#' @aliases methylRawList
#' @docType class
#' @rdname methylRawList-class
#' @export
setClass("methylRawList", representation(treatment = "numeric"),contains = "list")

#' read file(s) to a methylrawList or methylraw object
#'
#' The function reads a list of files or files with methylation information for bases/region in the genome and creates a methylrawList or methylraw object
#' @param location file location(s), either a list of locations (each a character string) or one location string
#' @param sample.id sample.id(s)
#' @param assembly a string that defines the genome assembly such as hg18, mm9
#' @param header if the input file has a header or not (default: TRUE)
#' @param pipeline name of the alignment pipeline, it can be either "amp" or "bismark". The methylation text files generated from other pipelines can be read as generic methylation text files by supplying a named \code{\link[base]{list}} argument as "pipeline" argument.
#' The named \code{list} should containt column numbers which denotes which column of the text file corresponds to values and genomic location of the methylation events. See Details for more.
#' @param resolution designates whether methylation information is base-pair resolution or regional resolution. allowed values 'base' or 'region'. Default 'base'
#' @param treatment a vector contatining 0 and 1 denoting which samples are control which samples are test
#' @param context methylation context string, ex: CpG,CpH,CHH, etc. (default:CpG)
#' @usage read(location,sample.id,assembly,pipeline="amp",header=T, context="CpG",resolution="base",treatment)
#' @examples
#' 
#' # this is a list of example files, ships with the package
#' # for your own analysis you will just need to provide set of paths to files
#' #you will not need the "system.file(..."  part
#' file.list=list( system.file("extdata", "test1.myCpG.txt", package = "methylKit"),
#'                 system.file("extdata", "test2.myCpG.txt", package = "methylKit"),
#'                 system.file("extdata", "control1.myCpG.txt", package = "methylKit"),
#'                 system.file("extdata", "control2.myCpG.txt", package = "methylKit") )
#'
#' # read the files to a methylRawList object: myobj
#' myobj=read( file.list,
#'             sample.id=list("test1","test2","ctrl1","ctrl2"),assembly="hg18",treatment=c(1,1,0,0))
#'             
#' # read one file as methylRaw object
#' myobj=read( file.list[[1]],
#'             sample.id="test1",assembly="hg18")
#'             
#' # read a generic text file containing CpG methylation values
#' # let's first look at the content of the file
#' generic.file=system.file("extdata", "generic1.CpG.txt", package = "methylKit")
#' read.table(generic.file,header=TRUE)
#' 
#' # And this is how you can read that generic file as a methylKit object            
#'  myobj=read( generic.file,pipeline=list(fraction=FALSE, chr.col=1,start.col=2,end.col=2,coverage.col=4,strand.col=3,freqC.col=5),
#'             sample.id="test1",assembly="hg18")
#'             
#' @section Details:
#'  When \code{pipeline} argument is a list, it is exptected to provide a named list with following names.
#'  'fraction' is a logical value, denoting if the column frequency of Cs has a range from [0-1] or [0-100]. If true it assumes range is [0-1].
#'  'chr.col" is the number of the column that has chrosome string.   
#'  'start.col' is the number of the column that has start coordinate of the base/region of the methylation event.
#'  'end.col'  is the number of the column that has end coordinate of the base/region of the methylation event.
#'  'coverage.col' is the number of the column that has read coverage values. 
#'  'strand.col' is the number of the column that has strand information, the strand information in the file has to be in the form of '+' or '-', 
#'  'freqC.col' is the number of the column that has the frequency of Cs. See examples to see how to read a generic methylation text file.
#'  
#' @return returns methylRaw or methylRawList
#' 
#' @export
#' @docType methods
#' @rdname read-methods
setGeneric("read", function(location,sample.id,assembly,pipeline="amp",header=T, context="CpG",resolution="base",treatment) standardGeneric("read"))



#' @rdname read-methods
#' @aliases read,character,character,character-method
setMethod("read", signature(location = "character",sample.id="character",assembly="character"),
          
        function(location,sample.id,assembly,pipeline,header,context,resolution){ 
            if(! file.exists(location)){stop(location,", That file doesn't exist !!!")}
            data<- .readTableFast(location,header=header)    
            if(length(pipeline)==1 ){
              
              if(pipeline %in% c("amp","bismark") )
              {
              data<- .structureAMPoutput(data)
              }
              else{stop("unknown 'pipeline' argument, supported alignment pipelines: 'amp' or 'bismark' " )
                   }
              
            }
            else{
                .check.pipeline.list(pipeline)
                data<- .structureGeneric(data, pipeline)
            }

            obj=new("methylRaw",data,sample.id=sample.id,assembly=assembly,context=context,resolution="base")
            obj         
          }
)


# reads a list of CpG methylation files and makes methylRawList object
#
# @param a list containing locations(full paths) to CpG methylation files from alignment pipeline
# @param name a list of strings that defines the experiment
# @param assembly a string that defines the genome assembly such as hg18, mm9
# @param pipeline name of the alignment pipeline, currently only supports AMP (default: AMP), or for generic read, a list object contain \code{fraction}=TRUE/FALSE, \code{chr.col}, \code{strand.col}, \code{start.col}, \code{end.col}, \code{coverage.col},\code{freqC.col}, for example: \code{list(fraction=T, chr.col=1, strand.col=2, coverage.col=3, freqC.col=4, start.col=5, end.col=5)}  
# @param header if the input files has a header or not (default: TRUE)
# @param treatment a vector contatining 0 and 1 denoting which samples are control which samples are test
# @return returns a methylRawList object
#' @rdname read-methods
#' @aliases read,list,list,character-method
setMethod("read", signature(location = "list",sample.id="list",assembly="character"),
          function(location,sample.id,assembly,pipeline,header,context,resolution,treatment){ 
            
            #check if the given arugments makes sense
            if(length(location) != length(sample.id)){
              stop("length of 'location'  and 'name' should be same\n")
            }
            if( (length(treatment) != length(sample.id)) & (length(treatment) !=0) ){
              stop("length of 'treatment', 'name' and 'location' should be same\n")
            }
            
            # read each given location and record it as methylraw object
            outList=list()
            for(i in 1:length(location))
            {
              data<- .readTableFast(location[[i]],header=header)# read data
              if(length(pipeline)==1 )
              {
                if(pipeline %in% c("amp","bismark")){
                  data<- .structureAMPoutput(data)
                } else {
                  stop("pipeline length is equal to 1 and is not amp or bismark. If you do not have amp or bismark format, please give a parameter list containing the format information of the data. Please refer details in the read help page")
                }
              }
              else{
                #stop("unknown 'pipeline' argument, supported alignment pipelines: amp")
                .check.pipeline.list(pipeline)
                data<- .structureGeneric(data, pipeline)
              }
                  
              obj=new("methylRaw",data,sample.id=sample.id[[i]],assembly=assembly,context=context,resolution=resolution)
              outList[[i]]=obj       
            }
            myobj=new("methylRawList",outList,treatment=treatment)
            
            myobj
          })


#' filter methylRaw and methylRawList object based on read coverage
#'
#' This function filters \code{methylRaw} and \code{methylRawList} objects.
#' You can filter based on lower read cutoff or high read cutoff. Higher read cutoff is usefull to eliminate PCR effects
#' Lower read cutoff is usefull for doing better statistical tests.
#'
#' @param methylObj a \code{methylRaw} or \code{methylRawList} object
#' @param lo.count An integer for read counts.Bases/regions having lower coverage than this count is discarded
#' @param lo.perc  A double [0-100] for percentile of read counts. Bases/regions having lower coverage than this percentile is discarded
#' @param hi.count An integer for read counts. Bases/regions having higher coverage than this is count discarded
#' @param hi.perc A double [0-100] for percentile of read counts. Bases/regions having higher coverage than this percentile is discarded
#' @usage filterByCoverage(methylObj,lo.count=NULL,lo.perc=NULL,hi.count=NULL,hi.perc=NULL)
#' @examples
#' data(methylKit)
#' 
#' # filter out bases with covereage above 500 reads
#' filtered1=filterByCoverage(methylRawList.obj,lo.count=NULL,lo.perc=NULL,hi.count=500,hi.perc=NULL)
#' 
#' # filter out bases with cread coverage above 99.9th percentile of coverage distribution
#' filtered2=filterByCoverage(methylRawList.obj,lo.count=NULL,lo.perc=NULL,hi.count=NULL,hi.perc=99.9)
#' 
#' 
#' 
#' @return \code{methylRaw} or \code{methylRawList} object depending on input object
#' @export
#' @docType methods
#' @rdname filterByCoverage-methods
setGeneric("filterByCoverage",function(methylObj,lo.count=NULL,lo.perc=NULL,hi.count=NULL,hi.perc=NULL) standardGeneric("filterByCoverage") )

#' @aliases filterByCoverage,methylRaw-method
#' @rdname filterByCoverage-methods
setMethod("filterByCoverage", signature(methylObj="methylRaw"),
                    function(methylObj,lo.count,lo.perc,hi.count,hi.perc){
                      if( is.null(lo.count) & is.null(lo.perc) & is.null(hi.count) & is.null(hi.perc) ){return(methylObj)}
                      
                      data=getData(methylObj) # get the data part
                      
                      #figure out which cut-offs to use, maybe there is more elagent ways, quick&dirty works for now
                      if(is.numeric(lo.count) ){lo.count=lo.count}
                      if(is.numeric(lo.perc)){lo.count=quantile(data$coverage,lo.perc/100)}
                      if(is.numeric(hi.count)){hi.count=hi.count}
                      if(is.numeric(hi.perc)){hi.count=quantile(data$coverage,hi.perc/100)}
                      
                      if(is.numeric(lo.count)){data=data[data$coverage>=lo.count,]}
                      if(is.numeric(hi.count)){data=data[data$coverage<hi.count,]}
                      

                      new("methylRaw",data,sample.id=methylObj@sample.id,
                                           assembly=methylObj@assembly,
                                           context=methylObj@context,resolution=methylObj@resolution)

                      
})

#' @aliases filterByCoverage,methylRawList-method
#' @rdname filterByCoverage-methods
setMethod("filterByCoverage", signature(methylObj="methylRawList"),
                    function(methylObj,lo.count,lo.perc,hi.count,hi.perc){
                      new.list=lapply(methylObj,filterByCoverage,lo.count,lo.perc,hi.count,hi.perc)
                      new("methylRawList", new.list,treatment=methylObj@treatment)
})


#' An S4 class for methylation events sampled in multiple experiments
#'
#' This class is designed to contain methylation information such as coverage, number of methylated bases, etc.. 
#' The methylation events contained in the class must be sampled in multiple experiments (ex: only CpG bases covered in multiple experiments are stored in the object of this class).
#' The class extends \code{data.frame} and creates an object that holds methylation information and genomic location.
#' The object belonging to this class is produced by \code{\link{unite}} function.
#'          
#' @section Slots:\describe{
#'                  \item{\code{sample.ids}:}{character vector for ids of samples in the object}
#'
#'                  \item{\code{assembly}:}{name of the genome assembly}
#'
#'                  \item{\code{context}:}{context of methylation. Ex: CpG,CpH,CHH, etc}
#'
#'                  \item{\code{treatment}:}{treatment vector denoting which samples are test and control}
#'
#'                  \item{\code{coverage.index}:}{vector denoting which columns in the data correspons to coverage values}
#'
#'                  \item{\code{numCs.index}:}{vector denoting which columns in the data correspons to number of methylatedCs values}
#'                  \item{\code{numTs.index}:}{vector denoting which columns in the data correspons to number of unmethylated Cs values}
#'                  \item{\code{destranded}:}{ logical value. If \code{TRUE} object is destranded, if \code{FALSE} it is not.}
#'                  \item{\code{resolution}:}{ resolution of methylation information, allowed values: 'base' or 'region'}
#' }
#' 
#' @section Details:
#' \code{methylBase} class extends \code{\link{data.frame}} class therefore providing novice and experienced R users with a data structure that is well known and ubiquitous in many R packages.
#' 
#' 
#' @section Accessors:
#' The following functions provides access to data slots of methylDiff:
#' \code{\link[methylKit]{getData}},\code{\link[methylKit]{getAssembly}},\code{\link[methylKit]{getContext}}

#' 
#' @section Coercion:
#'   \code{methylBase} object can be coerced to \code{\link[GenomicRanges]{GRanges}} object via \code{\link{as}} function.
#' 
#' 
#' @examples
#' data(methylKit)
#' library(GenomicRanges)
#' my.gr=as(methylBase.obj,"GRanges")
#' 
#' @name methylBase-class
#' @aliases methylBase
#' @docType class
#' @rdname methylBase-class
#' @export
setClass("methylBase",contains="data.frame",representation(
  sample.ids = "character", assembly = "character",context = "character",treatment="numeric",coverage.index="numeric",
                                   numCs.index="numeric",numTs.index="numeric",destranded="logical",resolution = "character"))


#' unites methylRawList to a single table 
#' 
#' This functions unites \code{methylRawList} object that only bases with coverage from all samples are retained.
#' The resulting object is a class of \code{methylBase}
#'
#' @param .Object a methylRawList object to be merged by common locations covered by reads
#' @param destrand if TRUE, reads covering both strands of a CpG dinucleotide will be merged, 
#'   do not set to TRUE if not only interested in CpGs (default: FALSE). If the methylRawList object
#'   contains regions rather than bases setting destrand to TRUE will have no effect.
#' @param min.per.group an integer denoting minimum number of samples per replicate needed to cover a region/base. By default only regions/bases that are covered in all samples
#'        are united as methylBase object, however by supplying an integer for this argument users can control how many samples needed to cover region/base to be united as methylBase object.
#'       For example, if min.per.group set to 2 and there are 3 replicates per condition, the bases/regions that are covered in at least 2 replicates will be united and missing data for uncovered bases/regions will appear as NAs.
#'
#' @usage unite(.Object,destrand=FALSE,min.per.group=NULL)
#' @return a methylBase object
#' @aliases unite,-methods unite,methylRawList-method
#' @export
#' @examples
#' 
#'  data(methylKit)
#'  ## Following 
#'  my.methylBase=unite(methylRawList.obj) 
#'  my.methylBase=unite(methylRawList.obj,destrand=TRUE)
#'  
#' @docType methods
#' @rdname unite-methods
setGeneric("unite", function(.Object,destrand=FALSE,min.per.group=NULL) standardGeneric("unite"))

#' @rdname unite-methods
#' @aliases unite,methylRawList-method
setMethod("unite", "methylRawList",
                    function(.Object,destrand,min.per.group){
  
                    

                    
                     #check if assemblies,contexts and resolutions are same type NOT IMPLEMENTED   
                     if( length(unique(vapply(.Object,function(x) x@context,FUN.VALUE="character"))) > 1)
                     {
                       stop("supplied methylRawList object have different methylation contexts:not all methylation events from the same bases")
                     }
                     if( length(unique(vapply(.Object,function(x) x@assembly,FUN.VALUE="character"))) > 1)
                     {
                       stop("supplied methylRawList object have different genome assemblies")
                     }                     
                     if( length(unique(vapply(.Object,function(x) x@resolution,FUN.VALUE="character"))) > 1)
                     {
                       stop("supplied methylRawList object have different methylation resolutions:some base-pair some regional")
                     } 
                     
                     if( (!is.null(min.per.group)) &  ( ! is.integer( min.per.group ) )  ){stop("min.per.group should be an integer\ntry providing integers as 1L, 2L,3L etc.\n")}
                     
                     #merge raw methylation calls together
                     df=getData(.Object[[1]])
                     if(destrand & (.Object[[1]]@resolution == "base") ){df=.CpG.dinuc.unify(df)}
                     
                     sample.ids=c(.Object[[1]]@sample.id)
                     assemblies=c(.Object[[1]]@assembly)
                     contexts  =c(.Object[[1]]@context)
                     for(i in 2:length(.Object))
                     {
                        df2=getData(.Object[[i]])
                        if(destrand){df2=.CpG.dinuc.unify(df2)}
                        if( is.null(min.per.group) ){
                          df=merge(df,df2[,c(1,6:8)],by="id",suffixes=c(as.character(i-1),as.character(i) ) ) # merge the dat to a data.frame
                        }else{
                          df=merge(df,df2,by="id",suffixes=c(as.character(i-1),as.character(i) ) ,all=TRUE)
                        }
                        sample.ids=c(sample.ids,.Object[[i]]@sample.id)
                        contexts=c(contexts,.Object[[i]]@context)
                     }

                     # stop if the assembly of object don't match
                     if( length( unique(assemblies) ) != 1 ){stop("assemblies of methylrawList elements should be same\n")}
          

                    if(  ! is.null(min.per.group) ){
                      # if the the min.per.group argument is supplied, remove the rows that doesn't have enough coverage

                      # get indices of coverage,numCs and numTs in the data frame 
                      coverage.ind=seq(6,by=7,length.out=length(.Object))
                      numCs.ind   =coverage.ind+1
                      numTs.ind   =coverage.ind+2
                      start.ind   =seq(3,by=7,length.out=length(.Object)) # will be needed to weed out NA values on chr/start/end/strand
                      
                      for(i in unique(.Object@treatment) ){
                        my.ind=coverage.ind[.Object@treatment==i]
                        ldat = !is.na(df[,my.ind])
                        if(  is.null(dim(ldat))  ){  # if there is only one dimension
                          df=df[ldat>=min.per.group,]
                        }else{
                          df=df[rowSums(ldat)>=min.per.group,]
                        }
                      }
                      mat=df[,c(start.ind-1,start.ind,start.ind+1,start.ind+2)] # get all location columns, they are now duplicated with possible NA values
                      locs=t(apply(mat,1,function(x) unique(x[!is.na(x)]) ) ) # get location matrix
                      if(ncol(locs)==3){ # if the resolution is base
                        df[,c(2:5)]=data.frame(chr=locs[,1],start=as.numeric(locs[,2]),end=as.numeric(locs[,2]),strand=locs[,3])
                      }else{   # if the resolution is region
                        df[,c(2:5)]=data.frame(chr=locs[,1],start=as.numeric(locs[,2]),end=as.numeric(locs[,3]),strand=locs[,4])
                      }
                      start.ind   =seq(10,by=7,length.out=length(.Object)) # will be needed to weed out NA values on chr/start/end/strand
                      
                      df=df[,-c(start.ind-1,start.ind,start.ind+1,start.ind+2)]
                      names(df)[2:5]=c("chr","start","end","strand")
                    }
                     
                    # get indices of coverage,numCs and numTs in the data frame 
                    coverage.ind=seq(6,by=3,length.out=length(.Object))
                    numCs.ind   =seq(6,by=3,length.out=length(.Object))+1
                    numTs.ind   =seq(6,by=3,length.out=length(.Object))+2

                    # change column names
                    names(df)[coverage.ind]=paste(c("coverage"),1:length(.Object),sep="" )
                    names(df)[numCs.ind]   =paste(c("numCs"),1:length(.Object),sep="" )
                    names(df)[numTs.ind]   =paste(c("numTs"),1:length(.Object),sep="" )
                     
                    #make methylbase object and return the object
                    obj=new("methylBase",as.data.frame(df),sample.ids=sample.ids,
                             assembly=unique(assemblies),context=unique(contexts),
                             treatment=.Object@treatment,coverage.index=coverage.ind,
                             numCs.index=numCs.ind,numTs.index=numTs.ind,destranded=destrand,resolution=.Object[[1]]@resolution )
                     obj
                    }
          )
            

#' get correlation between samples in methylBase object
#' 
#' The functions returns a matrix of correlation coefficients and/or a set of scatterplots showing the relationship between samples
#' 
#' @param .Object a methylBase object 
#' @param method a character string indicating which correlation coefficient (or covariance) is to be computed (default:"pearson", other options are "kendall" and "spearman") 
#' @param plot scatterPlot if TRUE (default:FALSE) 
#' @return a correlation matrix object and plot scatterPlot
#' @usage getCorrelation(.Object,method="pearson",plot=FALSE)
#' @examples
#' 
#' data(methylKit)
#' getCorrelation(methylBase.obj,method="pearson",plot=FALSE)
#' 
#' @aliases getCorrelation,-methods getCorrelation,methylBase-method
#' @export
#' @docType methods
#' @rdname getCorrelation-methods
setGeneric("getCorrelation", function(.Object,method="pearson",plot=FALSE) standardGeneric("getCorrelation"))

#' @rdname getCorrelation-methods
#' @aliases getCorrelation-method
setMethod("getCorrelation", "methylBase",
                    function(.Object,method,plot){
                        meth.mat = getData(.Object)[, .Object@numCs.index]/(.Object[,.Object@numCs.index] + .Object[,.Object@numTs.index] )                                      
                        names(meth.mat)=.Object@sample.ids
                        
                        print( cor(meth.mat,method=method) )
                      
                        
                        panel.cor.pearson <- function(x, y, digits=2, prefix="", cex.cor, ...)
                        {
                          usr <- par("usr"); on.exit(par(usr))
                          par(usr = c(0, 1, 0, 1))
                          r <- abs(cor(x, y,method="pearson"))
                          txt <- format(c(r, 0.123456789), digits=digits)[1]
                          txt <- paste(prefix, txt, sep="")
                          if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
                          text(0.5, 0.5, txt, cex = cex.cor * r)
                        }

                        panel.cor.kendall <- function(x, y, digits=2, prefix="", cex.cor, ...)
                        {
                          usr <- par("usr"); on.exit(par(usr))
                          par(usr = c(0, 1, 0, 1))
                          r <- abs(cor(x, y,method="kendall"))
                          txt <- format(c(r, 0.123456789), digits=digits)[1]
                          txt <- paste(prefix, txt, sep="")
                          if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
                          text(0.5, 0.5, txt, cex = cex.cor * r)
                        }
                        
                        panel.cor.spearman <- function(x, y, digits=2, prefix="", cex.cor, ...)
                        {
                          usr <- par("usr"); on.exit(par(usr))
                          par(usr = c(0, 1, 0, 1))
                          r <- abs(cor(x, y,method="spearman"))
                          txt <- format(c(r, 0.123456789), digits=digits)[1]
                          txt <- paste(prefix, txt, sep="")
                          if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
                          text(0.5, 0.5, txt, cex = cex.cor * r)
                        }
                        
                        
                        
                        panel.my.smooth2<-function(x, y, col = par("col"), bg = NA, pch = par("pch"), cex = 1, col.smooth = "darkgreen", span = 2/3, iter = 3, ...) 
                        {
                             par(new = TRUE)    #par(usr = c(usr[1:2], 0, 1.5) )
                            smoothScatter(x, y,colramp=colorRampPalette(topo.colors(100)), bg = bg)
                            ok <- is.finite(x) & is.finite(y)
                            if (any(ok)) 
                                lines(stats::lowess(x[ok], y[ok], f = span, iter = iter),col = col.smooth, ...)
                                abline(lm(y[ok]~x[ok]), col="red")
                        }
                        
                        panel.my.smooth<-function(x, y, col = par("col"), bg = NA, pch = par("pch"), cex = 0.3, col.smooth = "green", span = 2/3, iter = 3, ...) 
                        {
                            points(x, y, pch = 20, col = densCols(x,y,colramp=colorRampPalette(topo.colors(20))), bg = bg, cex = 0.1)
                            ok <- is.finite(x) & is.finite(y)
                            if (any(ok)){
                                lines(stats::lowess(x[ok], y[ok], f = span, iter = iter),col = col.smooth, ...);
                                abline(lm(y[ok]~x[ok]), col="red")}
                        }
                        panel.hist <- function(x, ...)
                        {
                            usr <- par("usr"); on.exit(par(usr))
                            par(usr = c(usr[1:2], 0, 1.5) )
                            h <- hist(x, plot = FALSE)
                            breaks <- h$breaks; nB <- length(breaks)
                            y <- h$counts; y <- y/max(y)
                            rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...)
                        }
                        
                        if(plot)
                        {  
                         
                          if(method=="spearman")
                          { pairs(meth.mat, 
                              lower.panel=panel.my.smooth2, 
                              upper.panel=panel.cor.spearman,
                              diag.panel=panel.hist,main=paste(.Object@context, .Object@resolution ,method,"cor.") )
                          }
                          if(method=="kendall")
                          { pairs(meth.mat, 
                                  lower.panel=panel.my.smooth2, 
                                  upper.panel=panel.cor.kendall,
                                  diag.panel=panel.hist,main=paste(.Object@context, .Object@resolution ,method,"cor.") )
                          }
                          if(method=="pearson")
                          { pairs(meth.mat, 
                                  lower.panel=panel.my.smooth2, 
                                  upper.panel=panel.cor.pearson,
                                  diag.panel=panel.hist,main=paste(.Object@context, .Object@resolution ,method,"cor.") )
                          }
                          
                          
                        }
                    }  
 )



#' get coverage stats from methylRaw object
#' 
#' The function returns basic statistics about read coverage per base. It can also plot a histogram of read coverage values.
#' 
#' @param .Object a \code{methylRaw} object 
#' @param plot plot a histogram of coverage if TRUE (default:FALSE) 
#' @param both.strands do stats and plot for both strands if TRUE (default:FALSE)
#' @param labels should the bars of the histrogram have labels showing the percentage of values in each bin (default:TRUE)
#' @param ... options to be passed to \code{\link[graphics]{hist}} function
#' @usage getCoverageStats(.Object,plot=FALSE,both.strands=FALSE,labels=TRUE,...)
#' @examples
#' data(methylKit)
#' 
#' # gets coverage stats for the first sample in methylRawList.obj object
#' getCoverageStats(methylRawList.obj[[1]],plot=TRUE,both.strands=FALSE,labels=TRUE)
#' 
#' 
#' @return a summary of coverage statistics or plot a histogram of coverage
#' @aliases getCoverageStats,methylRaw
#' @export
#' @docType methods
#' @rdname getCoverageStats-methods
setGeneric("getCoverageStats", function(.Object,plot=FALSE,both.strands=FALSE,labels=TRUE,...) standardGeneric("getCoverageStats"))

#' @rdname getCoverageStats-methods
#' @aliases getCoverageStats,methylRaw-method
setMethod("getCoverageStats", "methylRaw",
                    function(.Object,plot,both.strands,labels,...){
                      
                      if(!plot){
                        qts=seq(0,0.9,0.1) # get quantiles
                        qts=c(qts,0.95,0.99,0.995,0.999,1)                          
                        
                        if(both.strands){       
                          plus.cov=.Object[.Object$strand=="+",]$coverage
                          mnus.cov=.Object[.Object$strand=="-",]$coverage
                          
                          cat("read coverage statistics per base\n\n")
                          cat("FORWARD STRAND:\n")
                          cat("summary:\n")
                          print( summary( plus.cov ) )
                          cat("percentiles:\n")
                          print(quantile( plus.cov,p=qts ))
                          cat("\n\n")
                          cat("REVERSE STRAND:\n")
                          cat("summary:\n")
                          print( summary( mnus.cov ) )
                          cat("percentiles:\n")
                          print(quantile( mnus.cov,p=qts ))
                          cat("\n")                          
                        }else{
                          
                          all.cov=.Object$coverage
                          
                          cat("read coverage statistics per base\n")
                          cat("summary:\n")
                          print( summary( all.cov ) )
                          cat("percentiles:\n")
                          print(quantile( all.cov,p=qts ))
                          cat("\n")
                        }
                        
                      }else{
                        if(both.strands){   
                          plus.cov=.Object[.Object$strand=="+",]$coverage
                          mnus.cov=.Object[.Object$strand=="-",]$coverage
                          
                          par(mfrow=c(1,2))
                          if(labels){
                            a=hist(log10(plus.cov),plot=F)
                            my.labs=as.character(round(100*a$counts/length(plus.cov),1))
                          }else{my.labs=F}
 
                          hist(log10(plus.cov),col="chartreuse4",
                               xlab=paste("log10 of read coverage per",.Object@resolution),
                               main=paste("Histogram of", .Object@context, "coverage: Forward strand"),
                               labels=my.labs,...)
                          mtext(.Object@sample.id, side = 3)

                         if(labels){
                            a=hist(log10(mnus.cov),plot=F)
                            my.labs=as.character(round(100*a$counts/length(mnus.cov),1))
                          }else{my.labs=F}
                          a=hist(log10(mnus.cov),plot=F)
                          hist(log10(mnus.cov),col="chartreuse4",
                               xlab=paste("log10 of read coverage per",.Object@resolution),
                               main=paste("Histogram of", .Object@context, "coverage: Reverse strand"),
                               labels=my.labs,...)
                          mtext(.Object@sample.id, side = 3)
 
                        }else{
                          all.cov= .Object$coverage
                         if(labels){
                           a=hist(log10(all.cov),plot=F)
                           my.labs=as.character(round(100*a$counts/length(all.cov),1))
                          }else{my.labs=F}                          

                          hist(log10(all.cov),col="chartreuse4",
                               xlab=paste("log10 of read coverage per",.Object@resolution),
                               main=paste("Histogram of", .Object@context, "coverage"),
                               labels=my.labs,...)
                          mtext(.Object@sample.id, side = 3)

                        }
                        
                        
                      }
                        
                      
})

#' get Methylation stats from methylRaw object
#' 
#' The function returns basic statistics about % methylation per base/region. It can also plot a histogram of % methylation values.
#' 
#' @param .Object a \code{methylRaw} object 
#' @param plot plot a histogram of Methylation if TRUE (deafult:FALSE) 
#' @param both.strands do plots and stats for both strands seperately  if TRUE (deafult:FALSE)
#' @param labels should the bars of the histrogram have labels showing the percentage of values in each bin (default:TRUE)
#' @param ... options to be passed to \code{\link[graphics]{hist}} function.
#' @usage  getMethylationStats(.Object,plot=FALSE,both.strands=FALSE,labels=TRUE,...)
#' @examples
#' data(methylKit)
#' 
#' # gets coverage stats for the first sample in methylRawList.obj object
#' getMethylationStats(methylRawList.obj[[1]],plot=TRUE,both.strands=FALSE,labels=TRUE)
#'
#' @return a summary of Methylation statistics or plot a histogram of coverage
#' @export
#' @docType methods
#' @rdname getMethylationStats-methods
setGeneric("getMethylationStats", function(.Object,plot=FALSE,both.strands=FALSE,labels=TRUE,...) standardGeneric("getMethylationStats"))

#' @rdname getMethylationStats-methods
#' @aliases getMethylationStats,methylRaw-method
setMethod("getMethylationStats", "methylRaw",
                    function(.Object,plot,both.strands,labels,...){
                      
                      plus.met=100* .Object[.Object$strand=="+",]$numCs/.Object[.Object$strand=="+",]$coverage
                      mnus.met=100* .Object[.Object$strand=="-",]$numCs/.Object[.Object$strand=="-",]$coverage
                      all.met =100* .Object$numCs/.Object$coverage
                      
                      if(!plot){
                        qts=seq(0,0.9,0.1) # get quantiles
                        qts=c(qts,0.95,0.99,0.995,0.999,1)                          
                        
                        if(both.strands){       
                          
                          cat("methylation statistics per base\n\n")
                          cat("FORWARD STRAND:\n")
                          cat("summary:\n")
                          print( summary( plus.met ) )
                          cat("percentiles:\n")
                          print(quantile( plus.met,p=qts ))
                          cat("\n\n")
                          cat("REVERSE STRAND:\n")
                          cat("summary:\n")
                          print( summary( mnus.met ) )
                          cat("percentiles:\n")
                          print(quantile( mnus.met,p=qts ))
                          cat("\n")                          
                        }else{
                          
                          
                          cat("methylation statistics per base\n")
                          cat("summary:\n")
                          print( summary( all.met ) )
                          cat("percentiles:\n")
                          print(quantile( all.met,p=qts ))
                          cat("\n")
                        }
                        
                      }else{
                        if(both.strands){   
                          
                          par(mfrow=c(1,2))
                          if(labels){
                            a=hist((plus.met),plot=F)
                            my.labs=as.character(round(100*a$counts/length(plus.met),1))
                          }else{my.labs=FALSE}
                          hist((plus.met),col="cornflowerblue",
                               xlab=paste("% methylation per",.Object@resolution),
                               main=paste("Histogram of %", .Object@context,"methylation: Forward strand"),
                               labels=my.labs,...)
                          mtext(.Object@sample.id, side = 3)

                          if(labels){                          
                            a=hist((mnus.met),plot=F)
                            my.labs=as.character(round(100*a$counts/length(mnus.met),1))
                          }
                          else{my.labs=FALSE}
                          
                          hist((mnus.met),col="cornflowerblue",
                               xlab=paste("% methylation per",.Object@resolution),
                               main=paste("Histogram of %", .Object@context,"methylation: Reverse strand"),
                               labels=my.labs,...)
                          mtext(.Object@sample.id, side = 3)
 
                        }else{
                          if(labels){                          
                          
                            a=hist((all.met),plot=F)
                            my.labs=as.character(round(100*a$counts/length(all.met),1))
                          }else{my.labs=FALSE}
                          hist((all.met),col="cornflowerblue",
                               xlab=paste("% methylation per",.Object@resolution),
                               main=paste("Histogram of %", .Object@context,"methylation"),
                               labels=my.labs,...)
                          mtext(.Object@sample.id, side = 3)

                        }
                        
                        
                      }
                        
                      
})





# get distribution of difference between samples in methylBase object
# unites methylrawlist objects based on chromosomal positions of CpG dinucleotides
#setGeneric("getDifference", function(.Object,plot=F) standardGeneric("getCorrelation"))
#setMethod("getDifference", "methylBase",
#                    function(.Object,plot){
#                        meth.mat = .Object@data[, .Object@numCs.index]/(.Object@data[,.Object@numCs.index] + .Object@data[,.Object@numTs.index] )                                      
#                        names(meth.mat)=.Object@sample.ids                      
#                      
#                        ind=t(combn(1:4,2) )
#                        d.list=(meth.mat[,ind[1,1]]-meth.mat[,ind[1,2]])
#                        for(i in 2:nrow(ind))
#                        {
#                          d.list=cbind(d.list,meth.mat[,ind[i,1]]-meth.mat[,ind[i,2]])
#                        }
#                    }                       
#                  )      
                        
#
#  methylBase accessor functions
#


#' get assembly of the genome
#' 
#' The function returns the genome assembly stored in any of the \code{\link{methylBase}},\code{\link{methylRaw}},\code{\link{methylDiff}} objects
#' 
#' @param x an \code{\link{methylBase}},\code{\link{methylRaw}} or \code{\link{methylDiff}} object
#' @usage getAssembly(x)
#' @examples
#' 
#' data(methylKit)
#' 
#' getAssembly(methylBase.obj)
#' getAssembly(methylDiff.obj)
#' getAssembly(methylRawList.obj[[1]])
#' 
#' 
#' @return the assembly string for the object
#' @export
#' @docType methods
#' @rdname getAssembly-methods
setGeneric("getAssembly", def=function(x) standardGeneric("getAssembly"))

#' @rdname getAssembly-methods
#' @aliases getAssembly,methylBase-method
setMethod("getAssembly", signature="methylBase", definition=function(x) {
                return(x@assembly)
        }) 

#' @rdname getAssembly-methods
#' @aliases getAssembly,methylRaw-method
setMethod("getAssembly", signature="methylRaw", definition=function(x) {
                return(x@assembly)
        })

#' get the context of methylation
#' 
#' The function returns the context of methylation. For example: "CpG","CHH" or "CHG"
#' 
#' @param x an \code{\link{methylBase}},\code{\link{methylRaw}} or an \code{\link{methylDiff}} object
#' @usage getContext(x)
#' @examples 
#' 
#' data(methylKit)
#' 
#' getContext(methylBase.obj)
#' getContext(methylDiff.obj)
#' getContext(methylRawList.obj[[1]])
#'
#' @return a string for the context methylation 
#' @export
#' @docType methods
#' @rdname getContext-methods
setGeneric("getContext", def=function(x) standardGeneric("getContext"))

#' @rdname getContext-methods
#' @aliases getContext,methylBase-method
setMethod("getContext", signature="methylBase", definition=function(x) {
                return(x@context)
        })

#' @rdname getContext-methods
#' @aliases getContext,methylRaw-method
setMethod("getContext", signature="methylRaw", definition=function(x) {
                return(x@context)
        })



#' get the data slot from the methylKit objects
#' 
#' The functions retrieves the table containing methylation information from \code{methylKit} Objects.
#' The data retrived from this function is of a \code{\link{data.frame}}. This is basically containing all relevant methylation information per genomic region or base.
#'
#' @param x an \code{\link{methylBase}},\code{\link{methylRaw}} or \code{\link{methylDiff}} object
#' @usage getData(x)
#' @examples
#' data(methylKit)
#' 
#' # following commands show first lines of returned data.frames from getData() function
#' head(
#' getData(methylBase.obj)
#' )
#' 
#' head( getData(methylDiff.obj))
#'
#' head(getData(methylRawList.obj[[1]]))
#' 
#' 
#' @return data.frame for methylation events
#' @aliases getData,-methods getData,methylBase-method
#' @export
#' @docType methods
#' @rdname getData-methods
setGeneric("getData", def=function(x) standardGeneric("getData"))

#' @rdname getData-methods
#' @aliases getData-method
setMethod("getData", signature="methylBase", definition=function(x) {
                return(as(x,"data.frame"))
}) 

#' @rdname getData-methods
#' @aliases getData,methylRaw-method
setMethod("getData", signature="methylRaw", definition=function(x) {
                return(as(x,"data.frame"))
})

## CONVERTOR FUNCTIONS FOR methylRaw and methylBase OBJECT
#convert methylRaw to GRanges
setAs("methylRaw", "GRanges", function(from)
                      {
                        from2=getData(from)
                        GRanges(seqnames=from2$chr,ranges=IRanges(start=from2$start, end=from2$end),
                                       strand=from2$strand, 
                                       id=from2$id,
                                       coverage=from2$coverage,
                                       numCs   =from2$numCs,
                                       numTs  =from2$numTs                                
                                       )

})

setAs("methylBase", "GRanges", function(from)
                      {
                        #from=getData(from1)
                        GRanges(seqnames=from$chr,ranges=IRanges(start=from$start, end=from$end),
                                       strand=from$strand, 
                                       id=from$id,
                                       data.frame(from[,6:ncol(from)])
                                       )

})

### subset methylBase and methylRaw objects

#' selects rows from of methylKit objects
#'
#' The function returns a subset of data contained in the \code{methylKit} objects.
#' 
#' @param x an \code{\link{methylBase}},\code{\link{methylRaw}} or \code{\link{methylDiff}} object
#' @param i a numeric or logical vector. This vector corresponds to bases or regions contained in \code{methylKit} objects.
#'            The vector is used to subset the data.  
#' @usage select(x,i)
#' @examples
#' data(methylKit)
#' subset1=select(methylRawList.obj[[1]],1:100) # selects first hundred rows, returns a methylRaw object
#' subset2=select(methylBase.obj,1:100) # selects first hundred rows, returns a methylBase object
#' subset3=select(methylDiff.obj,1:100) # selects first hundred rows, returns a methylDiff object
#' 
#' @return a \code{\link{methylBase}},\code{\link{methylRaw}} or \code{\link{methylDiff}} object depending on the input object.
#' @export
#' @docType methods
#' @rdname select-methods
setGeneric("select", def=function(x,i) standardGeneric("select"))




#' @aliases select,methylBase-method
#' @rdname select-methods
setMethod("select", "methylBase",
          function(x, i)
          {

            new("methylBase",getData(x)[i,],
                sample.ids = x@sample.ids, 
                assembly = x@assembly,
                context = x@context,
                treatment=x@treatment,
                coverage.index=x@coverage.index,
                numCs.index=x@numCs.index,
                numTs.index=x@numTs.index,
                destranded=x@destranded,
                resolution =x@resolution)
           }
)



#' @aliases select,methylRaw-method
#' @rdname select-methods
setMethod("select", "methylRaw",
          function(x, i)
          {

          new("methylRaw",getData(x)[i,],sample.id=x@sample.id,
                                           assembly=x@assembly,
                                           context=x@context,resolution=x@resolution)
           }
          

)

